August, 2018
Sxy = sum((x - mean(x)) * (y - mean(y))) Sxx = sum((x - mean(x))^2) beta1 = Sxy / Sxx beta0 = mean(y) - beta1 * mean(x) c(beta0 = beta0, beta1 = beta1)
## beta0 beta1 ## -0.1333333 0.3696970
smash_fit = lm(count ~ time + kakao, data = smash_dat) summary(smash_fit)
## ## Call: ## lm(formula = count ~ time + kakao, data = smash_dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.20 -0.35 0.00 0.35 1.20 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.4000 1.3223 -0.302 0.7711 ## time 0.4000 0.1604 2.494 0.0413 * ## kakao 0.2000 0.9212 0.217 0.8343 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.7171 on 7 degrees of freedom ## Multiple R-squared: 0.7584, Adjusted R-squared: 0.6894 ## F-statistic: 10.99 on 2 and 7 DF, p-value: 0.006933
simul_fit = lm(y ~ . - 1, data = simul_dat) summary(simul_fit)
## ## Call: ## lm(formula = y ~ . - 1, data = simul_dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.294359 -0.043645 0.000202 0.063692 0.263941 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## intercept 0.50254 0.01052 47.77 <2e-16 *** ## x1 0.99676 0.01601 62.25 <2e-16 *** ## x2 0.99465 0.01095 90.85 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.1043 on 97 degrees of freedom ## Multiple R-squared: 0.9906, Adjusted R-squared: 0.9903 ## F-statistic: 3414 on 3 and 97 DF, p-value: < 2.2e-16
simul_fit2 = lm(y ~ . - 1 - x2, data = simul_dat) summary(simul_fit2)
## ## Call: ## lm(formula = y ~ . - 1 - x2, data = simul_dat) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.8085 -0.6385 -0.1786 0.5147 2.3165 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## intercept 0.465044 0.097034 4.793 5.86e-06 *** ## x1 0.001056 0.107779 0.010 0.992 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.9632 on 98 degrees of freedom ## Multiple R-squared: 0.1922, Adjusted R-squared: 0.1757 ## F-statistic: 11.66 on 2 and 98 DF, p-value: 2.865e-05
require(regbook)
require(leaps)
data("usedcars")
head(usedcars, n = 10)
## price year mileage cc automatic ## 1 790 78 133462 1998 1 ## 2 1380 39 33000 2000 1 ## 3 270 109 120000 1800 0 ## 4 1190 20 69727 1999 1 ## 5 590 70 112000 2000 0 ## 6 1120 58 39106 1998 1 ## 7 815 53 95935 1800 1 ## 8 450 68 120000 1800 0 ## 9 1290 15 20215 1798 1 ## 10 420 96 140000 1800 0
lm_used_car = lm(price ~ ., data = usedcars) summary(usedcars)
## price year mileage cc ## Min. : 270.0 Min. : 15.0 Min. : 8000 Min. :1798 ## 1st Qu.: 620.0 1st Qu.: 42.0 1st Qu.: 53222 1st Qu.:1800 ## Median : 795.0 Median : 68.0 Median : 76388 Median :1998 ## Mean : 804.6 Mean : 66.0 Mean : 79132 Mean :1923 ## 3rd Qu.: 997.5 3rd Qu.: 81.5 3rd Qu.:113598 3rd Qu.:2000 ## Max. :1380.0 Max. :129.0 Max. :140000 Max. :2000 ## automatic ## Min. :0.0000 ## 1st Qu.:0.0000 ## Median :1.0000 ## Mean :0.5667 ## 3rd Qu.:1.0000 ## Max. :1.0000
usedcars_reg = regsubsets(price ~ ., data = usedcars, nbest = 4) summ_reg = summary(usedcars_reg) tmp = with(summ_reg, round(data.frame(rss, rsq, adjr2, cp, bic), digits = 3)) data.frame(summ_reg$outmat, tmp)
## year mileage cc automatic rss rsq adjr2 cp bic ## 1 ( 1 ) * 619638.9 0.768 0.760 34.621 -37.089 ## 1 ( 2 ) * 1038891.7 0.612 0.598 75.638 -21.585 ## 1 ( 3 ) * 1826993.4 0.317 0.293 152.740 -4.650 ## 1 ( 4 ) * 2403226.1 0.102 0.070 209.114 3.574 ## 2 ( 1 ) * * 411219.5 0.846 0.835 16.231 -45.988 ## 2 ( 2 ) * * 483775.0 0.819 0.806 23.329 -41.113 ## 2 ( 3 ) * * 544908.8 0.796 0.781 29.310 -37.543 ## 2 ( 4 ) * * 697150.7 0.740 0.720 44.204 -30.151 ## 3 ( 1 ) * * * 293331.9 0.890 0.878 6.697 -52.721 ## 3 ( 2 ) * * * 356186.9 0.867 0.852 12.847 -46.897 ## 3 ( 3 ) * * * 431366.3 0.839 0.820 20.202 -41.152 ## 3 ( 4 ) * * * 654467.4 0.755 0.727 42.028 -28.646 ## 4 ( 1 ) * * * * 255538.1 0.905 0.889 5.000 -53.458
require(olsrr) usedcars_reg2 = ols_step_all_possible(lm(price ~ ., data = usedcars)) par(mfrow = c(2, 2)) plot(usedcars_reg2)
step(lm(price ~ ., data = usedcars), direction = "both", k = 2)
## Start: AIC=281.5 ## price ~ year + mileage + cc + automatic ## ## Df Sum of Sq RSS AIC ## <none> 255538 281.50 ## - cc 1 37794 293332 283.64 ## - mileage 1 100649 356187 289.46 ## - automatic 1 175828 431366 295.20 ## - year 1 398929 654467 307.71
## ## Call: ## lm(formula = price ~ year + mileage + cc + automatic, data = usedcars) ## ## Coefficients: ## (Intercept) year mileage cc automatic ## 525.286961 -5.799637 -0.002263 0.388787 165.312633
## F_value beta0 beta1 r_sq ## Set A 17.99 3 0.5 0.67 ## Set B 17.97 3 0.5 0.67 ## Set C 17.97 3 0.5 0.67 ## Set D 18.00 3 0.5 0.67
영향력 관측치 : 회귀계수와 예측값에 큰 영향을 주는 관측치
par(mfrow = c(2, 2)) plot(lm_result[[1]])
par(mfrow = c(2, 2)) plot(lm_result[[2]])
par(mfrow = c(2, 2)) plot(lm_result[[3]])
par(mfrow = c(2, 2)) suppressWarnings(plot(lm_result[[4]]))
lm_used_car = lm(price ~ ., data = usedcars) par(mfrow = c(2, 2)) plot(lm_used_car)
infl_res = influence.measures(lm_used_car) print(infl_res$infmat, digits = 2)
## dfb.1_ dfb.year dfb.milg dfb.cc dfb.atmt dffit cov.r cook.d hat ## 1 -0.2072 -0.10854 0.32713 0.1793 0.1926 0.4466 1.34 4.0e-02 0.215 ## 2 -0.1877 0.03342 -0.34735 0.2475 0.2344 0.8333 0.46 1.2e-01 0.105 ## 3 -0.1030 -0.08699 0.00892 0.1095 0.0637 -0.2346 1.37 1.1e-02 0.156 ## 4 0.0047 0.01624 -0.01173 -0.0059 -0.0032 -0.0209 1.51 9.1e-05 0.190 ## 5 0.1123 0.10246 -0.13508 -0.1250 0.1346 -0.2463 1.36 1.2e-02 0.158 ## 6 -0.0789 0.13648 -0.18121 0.0871 0.1124 0.2980 1.22 1.8e-02 0.119 ## 7 -0.1423 0.10058 -0.10620 0.1468 -0.1017 -0.2414 1.38 1.2e-02 0.165 ## 8 -0.2769 0.30850 -0.32065 0.2570 0.2499 -0.5807 1.12 6.6e-02 0.187 ## 9 0.1547 -0.06602 -0.05437 -0.1388 0.0315 0.2155 1.59 9.6e-03 0.251 ## 10 0.1309 -0.05621 0.16917 -0.1377 -0.1022 0.3393 1.33 2.3e-02 0.174 ## 11 0.0014 -0.00067 0.00031 -0.0014 -0.0017 -0.0031 1.33 2.0e-06 0.078 ## 12 -0.2788 0.08519 -0.02219 0.3108 -0.3387 0.5660 0.84 6.0e-02 0.113 ## 13 0.1091 -0.02779 0.02870 -0.1277 0.1598 -0.2463 1.25 1.2e-02 0.110 ## 14 0.5293 -0.22998 -0.16659 -0.4775 0.1171 0.7314 1.14 1.0e-01 0.239 ## 15 -0.0243 -0.03730 -0.03217 0.0443 -0.1002 0.1396 1.46 4.0e-03 0.178 ## 16 -0.4718 0.09220 0.10187 0.4546 -0.2912 -0.7000 0.66 8.8e-02 0.113 ## 17 0.0812 -0.11235 0.03078 -0.0685 -0.0996 -0.1780 1.40 6.6e-03 0.153 ## 18 0.1461 0.13855 0.01941 -0.1805 -0.1588 -0.5110 0.79 4.9e-02 0.089 ## 19 0.0877 -0.45430 0.73311 -0.1599 0.3662 -0.8755 1.51 1.5e-01 0.373 ## 20 0.1730 -0.08498 0.00757 -0.1648 0.1028 0.2679 1.24 1.5e-02 0.116 ## 21 0.1476 0.08123 -0.20448 -0.1333 -0.1399 -0.3080 1.29 1.9e-02 0.150 ## 22 -0.2137 -0.01187 0.08413 0.1925 0.1635 -0.3098 1.25 2.0e-02 0.137 ## 23 -0.1280 0.07155 0.08360 0.1051 0.1371 0.2489 1.42 1.3e-02 0.185 ## 24 0.2230 0.43065 -0.10331 -0.2630 -0.0999 0.7305 1.11 1.0e-01 0.229 ## 25 0.0013 0.00035 -0.00069 -0.0013 -0.0014 -0.0027 1.33 1.6e-06 0.079 ## 26 0.0691 0.20498 -0.14114 -0.0858 0.1256 0.2525 1.64 1.3e-02 0.278 ## 27 -0.0813 -0.09304 -0.04816 0.1275 -0.2500 0.3402 1.26 2.3e-02 0.151 ## 28 0.2853 -0.57118 0.44752 -0.2655 -0.3787 -0.8207 0.69 1.2e-01 0.150 ## 29 0.0263 0.01937 0.01086 -0.0292 -0.0162 0.0705 1.47 1.0e-03 0.171 ## 30 0.0073 0.01673 -0.01021 -0.0102 0.0171 -0.0261 1.51 1.4e-04 0.188